capture log close
clear all
log using lehd_college_migration_v5.txt, text replace

* ==============================================================================================
* lehd_college_migration_v5.do
* ==============================================================================================
* This program simulates labor force participation, earnings, and migration decisions using 
* a stylized model. 
* The goal is to determine the conditions under which out-of-state migration will cause biased
* estimates of treatment effects (e.g. college quality or STEM major)

* Created: 10/7/2018
* Updated: 10/15/2018
* Updated: 11/15/2018 adding bounding exercise
* Updated: 12/12/2018 continue bounding exercise
* Updated: 1/15/2019 alternative model that lets treatment affect variance of earnings shocks
* Updated: 2/14/2019 modifications to bounding exercise
* Updated 7/1/2025 final run through

* This program has several big chunks of code, which can be turned on and off:
* run_baseloops = runs our baseline model, looping over  values for beta_out_treat --> lehd_college_migration_results_model1.dta
* run_exogloops = runs model with exogenous migration, looping over  values for beta_out_treat --> lehd_college_migration_results_model2.dta
* run_varoutloops = runs our model letting the variance differ between in/out of state --> lehd_college_migration_results_model3.dta
* makeoutput = makes the output graphs (combines lehd_college_migration_results_model1,2,3.dta)
* run_leebounds = evaluates lee bound for our baseline model

* Maybe we should do with with moving as a normal variable, so it can have negatives. Right now 
* nobody moves if earnings out of state are lower than in-state. 

* Another thought: should we explicitly calculate indiv-specific true treatment effects? ;
* We can do this, though maybe it is not too interesting because there isnt much true heterogeneity. The heterogeneity comes;
* from the heterogeneity in res wage and moving cost. ;


*====================================================================================================

#delim ;

* Set locals indicating which parts of the program to execute;
local run_baseloops = 1;
local run_exogloops = 1;
local run_varoutloops = 1;
local makeoutput = 1;
local run_leebounds = 1;


* Set parameters of data generating process for base model;

local beta_in_treat = 2000;
local beta_out_treat = 1.5; /*Set as a multiple of beta_in_treat*/
local beta_in_0 = 8000;
local beta_out_0 = 8000;
local sigma_ability = 2000;
local sigma_in = 1000;
local sigma_out = 1000;
local sigma_out_treat = 0.0; /*How much treatment increases variance of out-of-state earnings shocks as multiple of sigma_out*/
local res_wage_max = 8000;
local move_cost_max = 3000;
local treat_share = 0.50;



/*
post `coef' (0) ("Full") ("Mean") ("Endogenous")  (`beta_in_treat') (`beta_out_treat') (`sigma_out_treat') (`move_share') (`dont_work_share') (`coeff_move_treat') (`coeff_move_earn') (`coeff_earn_move') (`coeff_act') (`coeff_obs') (`bias');
*/

if `run_baseloops' == 1 {;

/* ================================== Base Model ============================================
* Moving is endogenous to earnings differential
* Loop over different values of the treatment effect on out-of-state earnings relative to in-state earnings
* ==============================================================================================
*/
clear;
* First assign treatment status randomly;
set obs 100000;
set seed 12345;
gen temp = runiform();
gen treat = temp > `treat_share';
drop temp;

* Set up file for results to be posted to;

tempfile results;
tempname coef;

postfile `coef' decile str10 sample str15 moment str15 move beta_in_treat beta_out_treat sigma_out_treat move_share dont_work_share coeff_move_treat coeff_move_earn coeff_earn_move coeff_act coeff_obs bias using `results', replace;


foreach beta_out_treat of numlist 0(.1)2  {;
display "==================================================================================";
display "========= Endogenous Migration, beta_out_treat = `beta_out_treat', sigma_out_treat = `sigma_out_treat' ====================== ";
display "==================================================================================";

set seed 11111 ;

* Give each worker an ability draw, earnings in/out draws, res wage draw, and moving cost draw ;
gen ability = `sigma_ability'*rnormal();
gen earn_in =  `beta_in_0'  + ability + (`beta_in_treat')*treat						+ (`sigma_in')*rnormal() ;
gen earn_out = `beta_out_0' + ability + (`beta_out_treat'*`beta_in_treat')*treat 	+ (`sigma_out')*(1+`sigma_out_treat'*treat)*rnormal() ;

gen res_wage = `res_wage_max'*runiform();
gen move_cost = `move_cost_max'*runiform();

/*
* Now simulate working in state, working out-of-state, not working
* There are three states: work in-state, work out-of-state, do not work (in-state)
* We apply the res wage to both the in- and out-state wage offer, irrespective of moving cost.
* Earnings in and earnings out should be set to zero if lower than res wage. 
* THis will make it so that nobody that moves out of state will choose not to work
*/

gen earn_in_trunct = earn_in*(earn_in>res_wage);
gen earn_out_trunct = earn_out*(earn_out>res_wage);

gen earn_diff = earn_out_trunct - earn_in_trunct;
gen move = (earn_diff > move_cost);
gen dont_move = 1-move;

gen earn_actual = earn_in_trunct*dont_move + earn_out_trunct*move;
gen work_actual = earn_actual > 0;
gen dont_work_actual = 1 - work_actual;

gen earn_observed = earn_actual*dont_move + 0*move;
gen work_observed = earn_observed > 0;
gen dont_work_observed = 1 - work_observed;

gen learn_actual = log(earn_actual) ;
gen learn_observed = log(earn_observed) ;

* Run regressions that correlate moving with treatment and earnings;
reg move treat ;
local coeff_move_treat = _b[treat] ;

reg move earn_actual ;
local coeff_move_earn = (_b[earn_actual])*1000;

reg earn_actual move ;
local coeff_earn_move = _b[move];

* Calculate move rates by earnings percentiles;
xtile earn_actual_decile = earn_actual, n(10);
foreach q of numlist 1/10 {;
	
	qui sum move if earn_actual_decile == `q';
	local move_share = r(mean);
	qui sum dont_work_actual if earn_actual_decile == `q';
	local dont_work_share = r(mean);	
	post `coef' (`q') ("Full") ("EarnDec") ("Endogenous")  (`beta_in_treat') (`beta_out_treat') (`sigma_out_treat') (`move_share') (`dont_work_share') (`coeff_move_treat') (`coeff_move_earn') (`coeff_earn_move') (0) (0) (0);

	qui sum move if earn_actual_decile == `q' & treat == 0;
	local move_share = r(mean);
	qui sum dont_work_actual if earn_actual_decile == `q' & treat == 0;
	local dont_work_share = r(mean);	
	post `coef' (`q') ("Untreated") ("EarnDec") ("Endogenous")  (`beta_in_treat') (`beta_out_treat') (`sigma_out_treat') (`move_share') (`dont_work_share') (`coeff_move_treat') (`coeff_move_earn') (`coeff_earn_move') (0) (0) (0);

	
	qui sum move if earn_actual_decile == `q' & treat == 1;
	local move_share = r(mean);
	qui sum dont_work_actual if earn_actual_decile == `q' & treat == 1;
	local dont_work_share = r(mean);	
	post `coef' (`q') ("Treated") ("EarnDec") ("Endogenous")  (`beta_in_treat') (`beta_out_treat') (`sigma_out_treat') (`move_share') (`dont_work_share') (`coeff_move_treat') (`coeff_move_earn') (`coeff_earn_move') (0) (0) (0);
	
};


qui sum move;
local move_share = r(mean);
qui sum dont_work_actual;
local dont_work_share = r(mean);

* Impossible regression: latent earnings offers ;
reg earn_in treat, robust ;
reg earn_out treat, robust ;

* Most naive: regress observed earnings on treatment (including zeros) ;
reg earn_actual treat, robust ;
local coeff_act = _b[treat] ;
reg earn_observed treat, robust ;
local coeff_obs = _b[treat] ;
local bias = `coeff_obs' - `coeff_act' ;
post `coef' (0) ("Full") ("Level Mean") ("Endogenous")  (`beta_in_treat') (`beta_out_treat')  (`sigma_out_treat') (`move_share') (`dont_work_share') (`coeff_move_treat') (`coeff_move_earn') (`coeff_earn_move') (`coeff_act') (`coeff_obs') (`bias');

* Restrict to non-zero earnings (actual and observed);
reg earn_actual treat if earn_actual > 0, robust ;
local coeff_act = _b[treat] ;
reg earn_observed treat if earn_observed > 0, robust ;
local coeff_obs = _b[treat] ;
local bias = `coeff_obs' - `coeff_act' ;
post `coef' (0) ("Earn > 0") ("Level Mean") ("Endogenous")  (`beta_in_treat') (`beta_out_treat')  (`sigma_out_treat') (`move_share') (`dont_work_share') (`coeff_move_treat') (`coeff_move_earn') (`coeff_earn_move') (`coeff_act') (`coeff_obs') (`bias');

* Restrict to non-sero earnings (actual and observed), but use logs;
reg learn_actual treat, robust ;
local coeff_act = _b[treat] ;
reg learn_observed treat, robust ;
local coeff_obs = _b[treat] ;
local bias = `coeff_obs' - `coeff_act' ;
post `coef' (0) ("Earn > 0") ("Log Mean") ("Endogenous")  (`beta_in_treat') (`beta_out_treat') (`sigma_out_treat')  (`move_share') (`dont_work_share') (`coeff_move_treat') (`coeff_move_earn') (`coeff_earn_move') (`coeff_act') (`coeff_obs') (`bias');


* Quantile regressions with log earnings;
qreg learn_actual treat, q(10)  ;
local coeff_act = _b[treat] ;
qreg learn_observed treat, q(10)  ;
local coeff_obs = _b[treat] ;
local bias = `coeff_obs' - `coeff_act' ;
post `coef' (0) ("Earn > 0") ("Log p10") ("Endogenous")  (`beta_in_treat') (`beta_out_treat') (`sigma_out_treat')  (`move_share') (`dont_work_share') (`coeff_move_treat') (`coeff_move_earn') (`coeff_earn_move') (`coeff_act') (`coeff_obs') (`bias');


qreg learn_actual treat, q(50) ;
local coeff_act = _b[treat] ;
qreg learn_observed treat, q(50) ;
local coeff_obs = _b[treat] ;
local bias = `coeff_obs' - `coeff_act' ;
post `coef' (0) ("Earn > 0") ("Log p50") ("Endogenous")  (`beta_in_treat') (`beta_out_treat')  (`sigma_out_treat') (`move_share') (`dont_work_share') (`coeff_move_treat') (`coeff_move_earn') (`coeff_earn_move') (`coeff_act') (`coeff_obs') (`bias');


qreg learn_actual treat, q(90) ;
local coeff_act = _b[treat] ;
qreg learn_observed treat, q(90) ;
local coeff_obs = _b[treat] ;
local bias = `coeff_obs' - `coeff_act' ;
post `coef' (0) ("Earn > 0") ("Log p90") ("Endogenous")  (`beta_in_treat') (`beta_out_treat') (`sigma_out_treat')  (`move_share') (`dont_work_share') (`coeff_move_treat') (`coeff_move_earn') (`coeff_earn_move') (`coeff_act') (`coeff_obs') (`bias');

drop ability *earn* res_wage *move* *work* ;

};
* End of loop over beta_out_treat values;
local beta_out_treat = 1.5;

postclose `coef';
use `results',clear;
save lehd_college_migration_results_model1.dta, replace;

}; 
* End of run_baseloops commands;

 if `run_exogloops' == 1 {;
 
/* ================================== Model two ============================================
* Moving is exogenous to earnings differential and treatment (75% move randomly)
* Loop over different values of the treatment effect on out-of-state earnings relative to in-state earnings
* ==============================================================================================
*/
clear;
* First assign treatment status randomly;
set obs 100000;
set seed 12345;
gen temp = runiform();
gen treat = temp > `treat_share';
drop temp;

* Set up file for results to be posted to;

tempfile results;
tempname coef;

postfile `coef' decile str10 sample str15 moment str15 move beta_in_treat beta_out_treat sigma_out_treat move_share dont_work_share coeff_move_treat coeff_move_earn coeff_earn_move coeff_act coeff_obs bias using `results', replace;

foreach beta_out_treat of numlist 0(.1)2 {;
display "==================================================================================";
display "========= Exogenous Migration, beta_out_treat = `beta_out_treat', sigma_out_treat = `sigma_out_treat' ====================== ";
display "==================================================================================";

set seed 11111 ;

gen ability = `sigma_ability'*rnormal();
gen earn_in =  `beta_in_0'  + ability + (`beta_in_treat')*treat						+ (`sigma_in')*rnormal() ;
gen earn_out = `beta_out_0' + ability + (`beta_out_treat'*`beta_in_treat')*treat 	+ (`sigma_out')*(1+`sigma_out_treat'*treat)*rnormal() ;

gen res_wage = `res_wage_max'*runiform();
gen move_cost = `move_cost_max'*runiform();

/*
* Now simulate working in state, working out-of-state, not working
* There are three states: work in-state, work out-of-state, do not work (in-state)
* We apply the res wage before the move.
* Earnings in and earnings out should be set to zero if lower than res wage. 
* THis will make it so that nobody that moves out of state will choose not to work
*/

gen earn_in_trunct = earn_in*(earn_in>res_wage);
gen earn_out_trunct = earn_out*(earn_out>res_wage);

gen earn_diff = earn_out_trunct - earn_in_trunct;
gen temp = runiform();
gen move = temp > .75;
gen dont_move = 1-move;
drop temp;

gen earn_actual = earn_in_trunct*dont_move + earn_out_trunct*move;
gen work_actual = earn_actual > 0;
gen dont_work_actual = 1 - work_actual;

gen earn_observed = earn_actual*dont_move + 0*move;
gen work_observed = earn_observed > 0;
gen dont_work_observed = 1 - work_observed;

gen learn_actual = log(earn_actual) ;
gen learn_observed = log(earn_observed) ;

* Run regressions that correlate moving with treatment and earnings;
reg move treat ;
local coeff_move_treat = _b[treat] ;

reg move earn_actual ;
local coeff_move_earn = (_b[earn_actual])*1000;

reg earn_actual move ;
local coeff_earn_move = _b[move];

xtile earn_actual_decile = earn_actual, n(10);
foreach q of numlist 1/10 {;

	qui sum move if earn_actual_decile == `q';
	local move_share = r(mean);
	qui sum dont_work_actual if earn_actual_decile == `q';
	local dont_work_share = r(mean);	
	post `coef' (`q') ("Full") ("EarnDec") ("Exogenous")  (`beta_in_treat') (`beta_out_treat')  (`sigma_out_treat') (`move_share') (`dont_work_share') (`coeff_move_treat') (`coeff_move_earn') (`coeff_earn_move') (0) (0) (0);

	qui sum move if earn_actual_decile == `q' & treat == 0;
	local move_share = r(mean);
	qui sum dont_work_actual if earn_actual_decile == `q' & treat == 0;
	local dont_work_share = r(mean);	
	post `coef' (`q') ("Untreated") ("EarnDec") ("Exogenous")  (`beta_in_treat') (`beta_out_treat')  (`sigma_out_treat') (`move_share') (`dont_work_share') (`coeff_move_treat') (`coeff_move_earn') (`coeff_earn_move') (0) (0) (0);

	
	qui sum move if earn_actual_decile == `q' & treat == 1;
	local move_share = r(mean);
	qui sum dont_work_actual if earn_actual_decile == `q' & treat == 1;
	local dont_work_share = r(mean);	
	post `coef' (`q') ("Treated") ("EarnDec") ("Exogenous")  (`beta_in_treat') (`beta_out_treat')  (`sigma_out_treat') (`move_share') (`dont_work_share') (`coeff_move_treat') (`coeff_move_earn') (`coeff_earn_move') (0) (0) (0);

	
	
	};


qui sum move;
local move_share = r(mean);
qui sum dont_work_actual;
local dont_work_share = r(mean);

* Impossible regression: latent earnings offers ;
reg earn_in treat, robust ;
reg earn_out treat, robust ;

* Most naive: regress observed earnings on treatment (including zeros) ;
reg earn_actual treat, robust ;
local coeff_act = _b[treat] ;
reg earn_observed treat, robust ;
local coeff_obs = _b[treat] ;
local bias = `coeff_obs' - `coeff_act' ;
post `coef' (0) ("Full") ("Level Mean") ("Exogenous")  (`beta_in_treat') (`beta_out_treat')  (`sigma_out_treat') (`move_share') (`dont_work_share') (`coeff_move_treat') (`coeff_move_earn') (`coeff_earn_move') (`coeff_act') (`coeff_obs') (`bias');


* Restrict to non-zero earnings (actual and observed);
reg earn_actual treat if earn_actual > 0, robust ;
local coeff_act = _b[treat] ;
reg earn_observed treat if earn_observed > 0, robust ;
local coeff_obs = _b[treat] ;
local bias = `coeff_obs' - `coeff_act' ;
post `coef' (0) ("Earn > 0") ("Level Mean") ("Exogenous")  (`beta_in_treat') (`beta_out_treat')  (`sigma_out_treat') (`move_share') (`dont_work_share') (`coeff_move_treat') (`coeff_move_earn') (`coeff_earn_move') (`coeff_act') (`coeff_obs') (`bias');



* Restrict to non-sero earnings (actual and observed), but use logs;
reg learn_actual treat, robust ;
local coeff_act = _b[treat] ;
reg learn_observed treat, robust ;
local coeff_obs = _b[treat] ;
local bias = `coeff_obs' - `coeff_act' ;
post `coef' (0) ("Earn > 0") ("Log Mean") ("Exogenous")  (`beta_in_treat') (`beta_out_treat') (`sigma_out_treat')  (`move_share') (`dont_work_share') (`coeff_move_treat') (`coeff_move_earn') (`coeff_earn_move') (`coeff_act') (`coeff_obs') (`bias');


* Quantile regressions with log earnings;
qreg learn_actual treat, q(10)  ;
local coeff_act = _b[treat] ;
qreg learn_observed treat, q(10)  ;
local coeff_obs = _b[treat] ;
local bias = `coeff_obs' - `coeff_act' ;
post `coef' (0) ("Earn > 0") ("Log p10") ("Exogenous")  (`beta_in_treat') (`beta_out_treat')  (`sigma_out_treat') (`move_share') (`dont_work_share') (`coeff_move_treat') (`coeff_move_earn') (`coeff_earn_move') (`coeff_act') (`coeff_obs') (`bias');


qreg learn_actual treat, q(50) ;
local coeff_act = _b[treat] ;
qreg learn_observed treat, q(50) ;
local coeff_obs = _b[treat] ;
local bias = `coeff_obs' - `coeff_act' ;
post `coef' (0) ("Earn > 0") ("Log p50") ("Exogenous")  (`beta_in_treat') (`beta_out_treat')  (`sigma_out_treat') (`move_share') (`dont_work_share') (`coeff_move_treat') (`coeff_move_earn') (`coeff_earn_move') (`coeff_act') (`coeff_obs') (`bias');


qreg learn_actual treat, q(90) ;
local coeff_act = _b[treat] ;
qreg learn_observed treat, q(90) ;
local coeff_obs = _b[treat] ;
local bias = `coeff_obs' - `coeff_act' ;
post `coef' (0) ("Earn > 0") ("Log p90") ("Exogenous")  (`beta_in_treat') (`beta_out_treat')  (`sigma_out_treat') (`move_share') (`dont_work_share') (`coeff_move_treat') (`coeff_move_earn') (`coeff_earn_move') (`coeff_act') (`coeff_obs') (`bias');

drop ability *earn* res_wage *move* *work* ;

};
* End of loop over beta_out_treat values;
local beta_out_treat = 1.5;

postclose `coef';
use `results',clear;
save lehd_college_migration_results_model2.dta, replace;

}; 
* End of run_exogloops commands;

if  `run_varoutloops' == 1 {;

/* ================================== Model Three: Outside Variance Effect ============================================
* Moving is endogenous to earnings differential
* Loop over different values of the treatment effect on out-of-state earnings relative to in-state earnings
* ==============================================================================================
*/
clear;
* First assign treatment status randomly;
set obs 100000;
set seed 12345;
gen temp = runiform();
gen treat = temp > `treat_share';
drop temp;

* Set up file for results to be posted to;

tempfile results;
tempname coef;

postfile `coef' decile str10 sample str15 moment str15 move beta_in_treat beta_out_treat sigma_out_treat move_share dont_work_share coeff_move_treat coeff_move_earn coeff_earn_move coeff_act coeff_obs bias using `results', replace;


foreach spec of numlist 1(1)4  {;


if `spec' == 1 {;
	local beta_out_treat = 1;
	local sigma_out_treat = 0.5;
	};
if `spec' == 2 {;
	local beta_out_treat = 1;
	local sigma_out_treat = -0.5;
	};
	
if `spec' == 3 {;
	local beta_out_treat = 1.5;
	local sigma_out_treat = 0.5;
	};
if `spec' == 4 {;
	local beta_out_treat = 1.5;
	local sigma_out_treat = -0.5;
	};
		

	
display "==================================================================================";
display "========= Exogenous Migration, beta_out_treat = `beta_out_treat', sigma_out_treat = `sigma_out_treat' ====================== ";
display "==================================================================================";

set seed 11111 ;

* Give each worker an ability draw, earnings in/out draws, res wage draw, and moving cost draw ;
gen ability = `sigma_ability'*rnormal();
gen earn_in =  `beta_in_0'  + ability + (`beta_in_treat')*treat						+ (`sigma_in')*rnormal() ;
gen earn_out = `beta_out_0' + ability + (`beta_out_treat'*`beta_in_treat')*treat 	+ (`sigma_out')*(1+`sigma_out_treat'*treat)*rnormal() ;

gen res_wage = `res_wage_max'*runiform();
gen move_cost = `move_cost_max'*runiform();

/*
* Now simulate working in state, working out-of-state, not working
* There are three states: work in-state, work out-of-state, do not work (in-state)
* We apply the res wage to both the in- and out-state wage offer, irrespective of moving cost.
* Earnings in and earnings out should be set to zero if lower than res wage. 
* THis will make it so that nobody that moves out of state will choose not to work
*/

gen earn_in_trunct = earn_in*(earn_in>res_wage);
gen earn_out_trunct = earn_out*(earn_out>res_wage);

gen earn_diff = earn_out_trunct - earn_in_trunct;
gen move = (earn_diff > move_cost);
gen dont_move = 1-move;

gen earn_actual = earn_in_trunct*dont_move + earn_out_trunct*move;
gen work_actual = earn_actual > 0;
gen dont_work_actual = 1 - work_actual;

gen earn_observed = earn_actual*dont_move + 0*move;
gen work_observed = earn_observed > 0;
gen dont_work_observed = 1 - work_observed;

gen learn_actual = log(earn_actual) ;
gen learn_observed = log(earn_observed) ;

* Run regressions that correlate moving with treatment and earnings;
reg move treat ;
local coeff_move_treat = _b[treat] ;

reg move earn_actual ;
local coeff_move_earn = (_b[earn_actual])*1000;

reg earn_actual move ;
local coeff_earn_move = _b[move];

* Calculate move rates by earnings percentiles;
xtile earn_actual_decile = earn_actual, n(10);
foreach q of numlist 1/10 {;
	
	qui sum move if earn_actual_decile == `q';
	local move_share = r(mean);
	qui sum dont_work_actual if earn_actual_decile == `q';
	local dont_work_share = r(mean);	
	post `coef' (`q') ("Full") ("EarnDec") ("Endogenous")  (`beta_in_treat') (`beta_out_treat')  (`sigma_out_treat') (`move_share') (`dont_work_share') (`coeff_move_treat') (`coeff_move_earn') (`coeff_earn_move') (0) (0) (0);

	qui sum move if earn_actual_decile == `q' & treat == 0;
	local move_share = r(mean);
	qui sum dont_work_actual if earn_actual_decile == `q' & treat == 0;
	local dont_work_share = r(mean);	
	post `coef' (`q') ("Untreated") ("EarnDec") ("Endogenous")  (`beta_in_treat') (`beta_out_treat') (`sigma_out_treat')  (`move_share') (`dont_work_share') (`coeff_move_treat') (`coeff_move_earn') (`coeff_earn_move') (0) (0) (0);

	
	qui sum move if earn_actual_decile == `q' & treat == 1;
	local move_share = r(mean);
	qui sum dont_work_actual if earn_actual_decile == `q' & treat == 1;
	local dont_work_share = r(mean);	
	post `coef' (`q') ("Treated") ("EarnDec") ("Endogenous")  (`beta_in_treat') (`beta_out_treat')  (`sigma_out_treat') (`move_share') (`dont_work_share') (`coeff_move_treat') (`coeff_move_earn') (`coeff_earn_move') (0) (0) (0);
	
};


qui sum move;
local move_share = r(mean);
qui sum dont_work_actual;
local dont_work_share = r(mean);

* Impossible regression: latent earnings offers ;
reg earn_in treat, robust ;
reg earn_out treat, robust ;

* Most naive: regress observed earnings on treatment (including zeros) ;
reg earn_actual treat, robust ;
local coeff_act = _b[treat] ;
reg earn_observed treat, robust ;
local coeff_obs = _b[treat] ;
local bias = `coeff_obs' - `coeff_act' ;
post `coef' (0) ("Full") ("Level Mean") ("Endogenous")  (`beta_in_treat') (`beta_out_treat') (`sigma_out_treat')  (`move_share') (`dont_work_share') (`coeff_move_treat') (`coeff_move_earn') (`coeff_earn_move') (`coeff_act') (`coeff_obs') (`bias');

* Restrict to non-zero earnings (actual and observed);
reg earn_actual treat if earn_actual > 0, robust ;
local coeff_act = _b[treat] ;
reg earn_observed treat if earn_observed > 0, robust ;
local coeff_obs = _b[treat] ;
local bias = `coeff_obs' - `coeff_act' ;
post `coef' (0) ("Earn > 0") ("Level Mean") ("Endogenous")  (`beta_in_treat') (`beta_out_treat') (`sigma_out_treat')  (`move_share') (`dont_work_share') (`coeff_move_treat') (`coeff_move_earn') (`coeff_earn_move') (`coeff_act') (`coeff_obs') (`bias');

* Restrict to non-sero earnings (actual and observed), but use logs;
reg learn_actual treat, robust ;
local coeff_act = _b[treat] ;
reg learn_observed treat, robust ;
local coeff_obs = _b[treat] ;
local bias = `coeff_obs' - `coeff_act' ;
post `coef' (0) ("Earn > 0") ("Log Mean") ("Endogenous")  (`beta_in_treat') (`beta_out_treat') (`sigma_out_treat')  (`move_share') (`dont_work_share') (`coeff_move_treat') (`coeff_move_earn') (`coeff_earn_move') (`coeff_act') (`coeff_obs') (`bias');


* Quantile regressions with log earnings;
qreg learn_actual treat, q(10)  ;
local coeff_act = _b[treat] ;
qreg learn_observed treat, q(10)  ;
local coeff_obs = _b[treat] ;
local bias = `coeff_obs' - `coeff_act' ;
post `coef' (0) ("Earn > 0") ("Log p10") ("Endogenous")  (`beta_in_treat') (`beta_out_treat')  (`sigma_out_treat') (`move_share') (`dont_work_share') (`coeff_move_treat') (`coeff_move_earn') (`coeff_earn_move') (`coeff_act') (`coeff_obs') (`bias');


qreg learn_actual treat, q(50) ;
local coeff_act = _b[treat] ;
qreg learn_observed treat, q(50) ;
local coeff_obs = _b[treat] ;
local bias = `coeff_obs' - `coeff_act' ;
post `coef' (0) ("Earn > 0") ("Log p50") ("Endogenous")  (`beta_in_treat') (`beta_out_treat')  (`sigma_out_treat') (`move_share') (`dont_work_share') (`coeff_move_treat') (`coeff_move_earn') (`coeff_earn_move') (`coeff_act') (`coeff_obs') (`bias');


qreg learn_actual treat, q(90) ;
local coeff_act = _b[treat] ;
qreg learn_observed treat, q(90) ;
local coeff_obs = _b[treat] ;
local bias = `coeff_obs' - `coeff_act' ;
post `coef' (0) ("Earn > 0") ("Log p90") ("Endogenous")  (`beta_in_treat') (`beta_out_treat') (`sigma_out_treat')  (`move_share') (`dont_work_share') (`coeff_move_treat') (`coeff_move_earn') (`coeff_earn_move') (`coeff_act') (`coeff_obs') (`bias');

drop ability *earn* res_wage *move* *work* ;

};
local sigma_out_treat = 0;
local beta_out_treat = 1.5;
* End of loop over beta_out_treat values;

postclose `coef';
use `results',clear;
save lehd_college_migration_results_model3.dta, replace;

}; 

* End of run_varoutloops commands;





if `makeoutput' == 1 {; 

* =============================================================================================;
* Make figures and tables of simulation output;
* ==============================================================================================;

use lehd_college_migration_results_model1.dta, clear;
append using lehd_college_migration_results_model2.dta;
append using lehd_college_migration_results_model3.dta;

save lehd_college_migration_results_v5.dta, replace;

*Graphs with Endogenous Migration;

#delim ;
twoway  line move_share       decile if sample == "Full" & moment == "EarnDec" & move == "Endogenous" & beta_out_treat == 1.5 & sigma_out_treat == 0 ||
		line move_share       decile if sample == "Untreated" & moment == "EarnDec" & move == "Endogenous" & beta_out_treat == 1.5  & sigma_out_treat == 0 ||
		line move_share       decile if sample == "Treated" & moment == "EarnDec" & move == "Endogenous" & beta_out_treat == 1.5  & sigma_out_treat == 0 ||
		,
		ylabel(0(0.1)0.6)
		scheme(s2mono)  legend(label(1 "Full Sample") label(2 "UnTreated") label(3 "Treated") )
		title("Share Moving by Actual Earnings Decile")
		subtitle("Endogenous Migration, Relative Treatment Effect = 1.5")
		ytitle("Fraction") 
		xtitle("Actual Earnings Decile" )
		name(move_bydecile_endog_15, replace);
		graph export move_bydecile_endog_15.emf, replace;
		
twoway  line move_share       decile if sample == "Full" & moment == "EarnDec" & move == "Endogenous" & beta_out_treat == 1  & sigma_out_treat == 0 ||
		line move_share       decile if sample == "Untreated" & moment == "EarnDec" & move == "Endogenous" & beta_out_treat == 1  & sigma_out_treat == 0 ||
		line move_share       decile if sample == "Treated" & moment == "EarnDec" & move == "Endogenous" & beta_out_treat == 1  & sigma_out_treat == 0 ||
		,
		ylabel(0(0.1)0.6)
		scheme(s2mono)  legend(label(1 "Full Sample") label(2 "UnTreated") label(3 "Treated") )
		title("Share Moving by Actual Earnings Decile")
		subtitle("Endogenous Migration, Relative Treatment Effect = 1")
		ytitle("Fraction") 
		xtitle("Actual Earnings Decile" )
		name(move_bydecile_endog_1, replace);
		graph export move_bydecile_endog_1.emf, replace;		
		
#delim ;
twoway  line move_share       beta_out_treat if sample == "Full" & moment == "Level Mean" & move == "Endogenous"  & sigma_out_treat == 0 || 
		line coeff_move_treat beta_out_treat if sample == "Full" & moment == "Level Mean" & move == "Endogenous"  & sigma_out_treat == 0 ||
		line coeff_move_earn  beta_out_treat if sample == "Full" & moment == "Level Mean" & move == "Endogenous"  & sigma_out_treat == 0 ||
		,
		scheme(s2mono) legend(rows(3) label(1 "Share moving") label(2 "{&Delta}Pr(move) assoc w Treatment") label(3 "{&Delta}Pr(move) assoc w $1000 {&Delta}earnings") )
		title("Mobility Patterns with Enogenous Migration")
		ytitle("Fraction") 
		xtitle("Effect of Treatment on Out-of-state Earnings Relative to In-state Earnings" "(1 = no differential)")
		name(move_endog, replace);
		graph export move_endog.emf, replace;
		
#delim ;		
twoway line bias beta_out_treat if sample == "Full" & moment == "Level Mean" & move == "Endogenous"  & sigma_out_treat == 0 || 
		line bias beta_out_treat if sample == "Earn > 0" & moment == "Level Mean" & move == "Endogenous"  & sigma_out_treat == 0 || 
		,
		scheme(s2mono) legend(label(1 "Full Sample") label(2 "Earnings > 0") )
		title("Bias in Estimate of Treatment Effect with Endogenous Migration" "Mean Earnings Level")
		xtitle("Effect of Treatment on Out-of-state Earnings Relative to In-state Earnings" "(1 = no differential)")
		name(bias_level_endog, replace);
		graph export bias_level_endog.emf, replace;
		
#delim ;

twoway line bias beta_out_treat if sample == "Earn > 0" & moment == "Log Mean" & move == "Endogenous"  & sigma_out_treat == 0 || 
		line bias beta_out_treat if sample == "Earn > 0" & moment == "Log p10" & move == "Endogenous"  & sigma_out_treat == 0 || 
		line bias beta_out_treat if sample == "Earn > 0" & moment == "Log p50" & move == "Endogenous"  & sigma_out_treat == 0 || 
		line bias beta_out_treat if sample == "Earn > 0" & moment == "Log p90" & move == "Endogenous"  & sigma_out_treat == 0 ||
		,
		scheme(s2mono) legend(label(1 "Mean") label(2 "10th ptile") label(3 "Median") label(4 "90th ptile"))
		title("Bias in Estimate of Treatment Effect with Endogenous Migration" "Earnings > 0, Log Earnings")
		xtitle("Effect of Treatment on Out-of-state Earnings Relative to In-state Earnings" "(1 = no differential)")
		name(bias_log_endog, replace);
		graph export bias_log_endog.emf, replace;
		
*Graphs with Exogenous Migration;
#delim ;
twoway  line move_share       decile if sample == "Full" & moment == "EarnDec" & move == "Exogenous" & beta_out_treat == 1.5  & sigma_out_treat == 0 ||
		line move_share       decile if sample == "Untreated" & moment == "EarnDec" & move == "Exogenous" & beta_out_treat == 1.5  & sigma_out_treat == 0 ||
		line move_share       decile if sample == "Treated" & moment == "EarnDec" & move == "Exogenous" & beta_out_treat == 1.5  & sigma_out_treat == 0 ||
		,
		ylabel(0(0.1)0.6)
		scheme(s2mono)  legend(label(1 "Full Sample") label(2 "UnTreated") label(3 "Treated") )
		title("Share Moving by Actual Earnings Decile")
		subtitle("Exogenous Migration, Relative Treatment Effect = 1.5")
		ytitle("Fraction") 
		xtitle("Actual Earnings Decile" )
		name(move_bydecile_exog_15, replace);
		graph export move_bydecile_exog_15.emf, replace;
		
twoway  line move_share       decile if sample == "Full" & moment == "EarnDec" & move == "Exogenous" & beta_out_treat == 1  & sigma_out_treat == 0 ||
		line move_share       decile if sample == "Untreated" & moment == "EarnDec" & move == "Exogenous" & beta_out_treat == 1  & sigma_out_treat == 0 ||
		line move_share       decile if sample == "Treated" & moment == "EarnDec" & move == "Exogenous" & beta_out_treat == 1  & sigma_out_treat == 0 ||
		,
		ylabel(0(0.1)0.6)
		scheme(s2mono)  legend(label(1 "Full Sample") label(2 "UnTreated") label(3 "Treated") )
		title("Share Moving by Actual Earnings Decile")
		subtitle("Exogenous Migration, Relative Treatment Effect = 1")
		ytitle("Fraction") 
		xtitle("Actual Earnings Decile" )
		name(move_bydecile_exog_1, replace);
		graph export move_bydecile_exog_1.emf, replace;
		
#delim ;
twoway  line move_share       beta_out_treat if sample == "Full" & moment == "Level Mean" & move == "Exogenous"  & sigma_out_treat == 0 || 
		line coeff_move_treat beta_out_treat if sample == "Full" & moment == "Level Mean" & move == "Exogenous"  & sigma_out_treat == 0 ||
		line coeff_move_earn  beta_out_treat if sample == "Full" & moment == "Level Mean" & move == "Exogenous"  & sigma_out_treat == 0 ||
		,
		scheme(s2mono) legend(rows(3) label(1 "Share moving") label(2 "{&Delta}Pr(move) assoc w Treatment") label(3 "{&Delta}Pr(move) assoc w $1000 {&Delta}earnings") )
		title("Mobility Patterns with Exogenous Migration")
		ytitle("Fraction") 
		xtitle("Effect of Treatment on Out-of-state Earnings Relative to In-state Earnings" "(1 = no differential)")
		name(move_exog, replace);
		graph export move_exog.emf, replace;
		

	
#delim ;

twoway line bias beta_out_treat if sample == "Full" & moment == "Level Mean" & move == "Exogenous"  & sigma_out_treat == 0 || 
		line bias beta_out_treat if sample == "Earn > 0" & moment == "Level Mean" & move == "Exogenous"  & sigma_out_treat == 0 || 
		,
		scheme(s2mono) legend(label(1 "Full Sample") label(2 "Earnings > 0") )
		title("Bias in Estimate of Treatment Effect with Exogenous Migration" "Mean Earnings Level")
		xtitle("Effect of Treatment on Out-of-state Earnings Relative to In-state Earnings" "(1 = no differential)")
		name(bias_level_exog, replace);
		graph export bias_level_exog.emf, replace;
		
twoway line bias beta_out_treat if sample == "Earn > 0" & moment == "Log Mean" & move == "Exogenous"  & sigma_out_treat == 0 || 
		line bias beta_out_treat if sample == "Earn > 0" & moment == "Log p10" & move == "Exogenous"  & sigma_out_treat == 0 || 
		line bias beta_out_treat if sample == "Earn > 0" & moment == "Log p50" & move == "Exogenous"  & sigma_out_treat == 0 || 
		line bias beta_out_treat if sample == "Earn > 0" & moment == "Log p90" & move == "Exogenous"  & sigma_out_treat == 0 ||
		,
		scheme(s2mono) legend(label(1 "Mean") label(2 "10th ptile") label(3 "Median") label(4 "90th ptile"))
		title("Bias in Estimate of Treatment Effect with Exogenous Migration" "Earnings > 0, Log Earnings")
		xtitle("Effect of Treatment on Out-of-state Earnings Relative to In-state Earnings" "(1 = no differential)")
		name(bias_log_exog, replace);
		graph export bias_log_exog.emf, replace;		
graph drop _all;
*Export data for table;
#delim ;
gen keep = 0;
replace keep = 1 if moment != "EarnDec" & move == "Endogenous" & beta_out_treat == 1.5  & sigma_out_treat == 0 ;
replace keep = 1 if moment != "EarnDec" & move == "Endogenous" & beta_out_treat == 1  & sigma_out_treat == 0 ; 
replace keep = 1 if moment != "EarnDec" & move == "Exogenous" & beta_out_treat == 1.5 & sigma_out_treat == 0 ;
replace keep = 1 if moment != "EarnDec" & move == "Endogenous" & beta_out_treat == 1.5  & sigma_out_treat == -0.5 ;
replace keep = 1 if moment != "EarnDec" & move == "Endogenous" & beta_out_treat == 1.5  & sigma_out_treat == 0.5 ;
replace keep = 1 if moment != "EarnDec" & move == "Endogenous" & beta_out_treat == 1  & sigma_out_treat == -0.5 ;
replace keep = 1 if moment != "EarnDec" & move == "Endogenous" & beta_out_treat == 1  & sigma_out_treat == 0.5 ;
	

export excel using lehd_college_migration_v5.xls if keep ==1, replace firstrow(var) ;

};
* End of makeoutput commands ;

if `run_leebounds' == 1{;

/* ================================== Examine Performance of Lee Bounds ============================================
* Do this for base model:
* Moving is endogenous to earnings differential
* Treatment effect on out-of-state earnings relative to in-state earnings = 1.5
* No effect of treatment on out-of-state earnings variance
* ==============================================================================================
*/

clear all;
set obs 100000;


* First assign treatment status randomly;
set seed 12345;
gen temp = runiform();
gen treat = temp > `treat_share';
drop temp;


set seed 11111 ;

* Give each worker an ability draw, earnings in/out draws, res wage draw, and moving cost draw ;
gen ability = `sigma_ability'*rnormal();
gen earn_in =  `beta_in_0'  + ability + (`beta_in_treat')*treat						+ (`sigma_in')*rnormal() ;
gen earn_out = `beta_out_0' + ability + (`beta_out_treat'*`beta_in_treat')*treat 	+ (`sigma_out')*(1+`sigma_out_treat'*treat)*rnormal() ;

gen res_wage = `res_wage_max'*runiform();
gen move_cost = `move_cost_max'*runiform();

/*
* Now simulate working in state, working out-of-state, not working
* There are three states: work in-state, work out-of-state, do not work (in-state)
* We apply the res wage to both the in- and out-state wage offer, irrespective of moving cost.
* Earnings in and earnings out should be set to zero if lower than res wage. 
* THis will make it so that nobody that moves out of state will choose not to work
*/

gen earn_in_trunct = earn_in*(earn_in>res_wage);
gen earn_out_trunct = earn_out*(earn_out>res_wage);

gen earn_diff = earn_out_trunct - earn_in_trunct;
gen move = (earn_diff > move_cost);
gen dont_move = 1-move;

gen earn_actual = earn_in_trunct*dont_move + earn_out_trunct*move;
gen work_actual = earn_actual > 0;
gen dont_work_actual = 1 - work_actual;

gen earn_observed = earn_actual*dont_move + 0*move;
gen work_observed = earn_observed > 0;
gen dont_work_observed = 1 - work_observed;

gen learn_actual = log(earn_actual) ;
gen learn_observed = log(earn_observed) ;

* Impossible regression: latent earnings offers ;
reg earn_in treat, robust ;
reg earn_out treat, robust ;

* Most naive: regress observed earnings on treatment (including zeros) ;
reg earn_actual treat, robust ;
local coeff_act = _b[treat] ;
reg earn_observed treat, robust ;
local coeff_obs = _b[treat] ;
local bias = `coeff_obs' - `coeff_act' ;
*post `coef' (0) ("Full") ("Level Mean") ("Endogenous")  (`beta_in_treat') (`beta_out_treat')  (`sigma_out_treat') (`move_share') (`dont_work_share') (`coeff_move_treat') (`coeff_move_earn') (`coeff_earn_move') (`coeff_act') (`coeff_obs') (`bias');

* Restrict to non-zero earnings (actual and observed);
reg earn_actual treat if earn_actual > 0, robust ;
local coeff_act = _b[treat] ;
reg earn_observed treat if earn_observed > 0, robust ;
local coeff_obs = _b[treat] ;
local bias = `coeff_obs' - `coeff_act' ;
*post `coef' (0) ("Earn > 0") ("Level Mean") ("Endogenous")  (`beta_in_treat') (`beta_out_treat') (`sigma_out_treat')  (`move_share') (`dont_work_share') (`coeff_move_treat') (`coeff_move_earn') (`coeff_earn_move') (`coeff_act') (`coeff_obs') (`bias');

* Restrict to non-sero earnings (actual and observed), but use logs;
reg learn_actual treat, robust ;
local coeff_act = _b[treat] ;
reg learn_observed treat, robust ;
local coeff_obs = _b[treat] ;
local bias = `coeff_obs' - `coeff_act' ;
*post `coef' (0) ("Earn > 0") ("Log Mean") ("Endogenous")  (`beta_in_treat') (`beta_out_treat') (`sigma_out_treat')  (`move_share') (`dont_work_share') (`coeff_move_treat') (`coeff_move_earn') (`coeff_earn_move') (`coeff_act') (`coeff_obs') (`bias');

* Quantile regressions with log earnings;
qreg learn_actual treat, q(10)  ;
local coeff_act = _b[treat] ;
qreg learn_observed treat, q(10)  ;
local coeff_obs = _b[treat] ;
local bias = `coeff_obs' - `coeff_act' ;
*post `coef' (0) ("Earn > 0") ("Log p10") ("Endogenous")  (`beta_in_treat') (`beta_out_treat') (`sigma_out_treat')  (`move_share') (`dont_work_share') (`coeff_move_treat') (`coeff_move_earn') (`coeff_earn_move') (`coeff_act') (`coeff_obs') (`bias');


qreg learn_actual treat, q(50) ;
local coeff_act = _b[treat] ;
qreg learn_observed treat, q(50) ;
local coeff_obs = _b[treat] ;
local bias = `coeff_obs' - `coeff_act' ;
*post `coef' (0) ("Earn > 0") ("Log p50") ("Endogenous")  (`beta_in_treat') (`beta_out_treat')  (`sigma_out_treat') (`move_share') (`dont_work_share') (`coeff_move_treat') (`coeff_move_earn') (`coeff_earn_move') (`coeff_act') (`coeff_obs') (`bias');


qreg learn_actual treat, q(90) ;
local coeff_act = _b[treat] ;
qreg learn_observed treat, q(90) ;
local coeff_obs = _b[treat] ;
local bias = `coeff_obs' - `coeff_act' ;
*post `coef' (0) ("Earn > 0") ("Log p90") ("Endogenous")  (`beta_in_treat') (`beta_out_treat')  (`sigma_out_treat') (`move_share') (`dont_work_share') (`coeff_move_treat') (`coeff_move_earn') (`coeff_earn_move') (`coeff_act') (`coeff_obs') (`bias');


* Now Implement Lee bounds approach;

* Trim the top and bottom X% of the earnings distribution for the group (treatment or control) that has the lowest attrition rate;
capture drop earn_observed_pos;
gen earn_observed_pos = earn_observed;
replace earn_observed_pos = . if earn_observed == 0;

capture drop trim_top;
capture drop trim_bottom;
gen trim_top = 0;
gen trim_bottom = 0;
tab treat, sum(earn_observed_pos);

* Calculate the response rate for treatment and control group, as well as the number of observations with non-missing outcome data;
sum work_observed if treat == 1;
local response_treat = r(mean);
sum earn_observed_pos if treat == 1;
local nobs_treat_pos = r(N);

sum work_observed if treat == 0;
local response_control = r(mean);
sum earn_observed_pos if treat == 0;
local nobs_control_pos = r(N);

local response_diff =  `response_treat' - `response_control';

display " ";
display "======== Treatment group response rate: `response_treat' ========";
display "======== Control group response rate: `response_control'  ========";
display "======== Diff in response rate: `response_diff' ========";
display " ";

/*
* Calculate the share to trim and create indicators for whether a given observation should
* be trimmed at the top and bottome of the earnings distribution
* Need to do it separately for the two cases of the treatment or control group having higher attrition rates
*/

* Control group has higher attrition;
* Will trim the treatment group;
if `response_diff' > 0 { ;
	local trim_share = abs(`response_diff')/`response_treat';
	display "======== Trimming `trim_share' of treatment group ========";
	
	* Sort in decending rank by observed earnings ;
	gsort -treat -work_observed -earn_observed_pos;
	* Indicator for being in the top X percent of this distribution and in the treatment group;
	replace trim_top = 1 if _n/`nobs_treat_pos' < `trim_share';

	* Sort in ascending rank by observed earnings;
	gsort -treat -work_observed +earn_observed_pos ;
	* Indicator for being in the bottom X percent of this distribution and in the treatment group;
	replace trim_bottom = 1 if _n/`nobs_treat_pos' < `trim_share';
	};

* Treatment group has higher attrition;
* Will trim the control group;
if `response_diff' < 0 { ;
	local trim_share = abs(`response_diff')/`response_control';
	display "======== Trimming `trim_share' of control group ======";

	* Sort in decending rank by observed earnings ;
	gsort +treat -work_observed -earn_observed_pos ;
	* Indicator for being in the top X percent of this distribution and in the control group;
	replace trim_top = 1 if _n/`nobs_control_pos' < `trim_share';

	* Sort in ascending rank by observed earnings;
	gsort +treat -work_observed +earn_observed_pos ;
	* Indicator for being in the bottom X percent of this distribution and in the control group;
	replace trim_bottom = 1 if _n/`nobs_control_pos' < `trim_share';
	};
	
	*twoway hist earn_observed if treat==0, freq fc(none) w(500) || hist earn_observed if treat==0 & trim_top != 1 , freq lc(none) fc(blue) w(500)||, scheme(s1mono);
	*twoway hist earn_observed if treat==0, freq fc(none) w(500) || hist earn_observed if treat==0 & trim_bottom != 1 , freq lc(none) fc(blue) w(500)||, scheme(s1mono);

* Now obs to trim if monotonicity fails and can't tell from which margin the missing observations come from;
gen trim_cont_top = 0;
gen trim_cont_bottom = 0;
gen trim_treat_top = 0;
gen trim_treat_bottom = 0;

local trim_share_cont = (1-`response_treat')/(`response_control');
local trim_share_treat = (1-`response_control')/(`response_treat');
display(" ");
display "======== Monot failure requires trimming `trim_share_cont' of control group AND ======";
display "======== Monot failure requires trimming `trim_share_treat' of treatment group ======";
display(" ");

	* Trimming the treatment group; 
	* Sort in decending rank by observed earnings ;
	gsort -treat -work_observed -earn_observed_pos;
	* Indicator for being in the top X percent of this distribution and in the treatment group;
	replace trim_treat_top = 1 if _n/`nobs_treat_pos' < `trim_share_treat';

	* Sort in ascending rank by observed earnings;
	gsort -treat -work_observed +earn_observed_pos ;
	* Indicator for being in the bottom X percent of this distribution and in the treatment group;
	replace trim_treat_bottom = 1 if _n/`nobs_treat_pos' < `trim_share_treat';
	
	* Trimming the control group;
	* Sort in decending rank by observed earnings ;
	gsort +treat -work_observed -earn_observed_pos ;
	* Indicator for being in the top X percent of this distribution and in the control group;
	replace trim_cont_top = 1 if _n/`nobs_control_pos' < `trim_share_cont';

	* Sort in ascending rank by observed earnings;
	gsort +treat -work_observed +earn_observed_pos ;
	* Indicator for being in the bottom X percent of this distribution and in the control group;
	replace trim_cont_bottom = 1 if _n/`nobs_control_pos' < `trim_share_cont';
	
	

* Regressions in Levels;
* Naive model: Regression for entire sample, treating missing as zero;
reg earn_actual treat;
reg earn_observed treat;

gen earn_observed_addzeros_top = earn_observed;
replace earn_observed_addzeros_top = 0 if trim_top == 1;
reg earn_observed_addzeros_top treat;

gen earn_observed_addzeros_bottom= earn_observed;
replace earn_observed_addzeros_bottom= 0 if trim_bottom == 1;
reg earn_observed_addzeros_bottom treat;

drop earn_observed_addzeros_top earn_observed_addzeros_bottom;

xtile ability_group = ability, nquantiles(20);
xtile movecost_group = move_cost, nquantiles(20);

* Base model: Regression for entire sample with positive earnings;
* This will match our regression above where we explicitly restrict sample to those with earn_observed > 0;
reg earn_actual treat if earn_actual > 0;
reg earn_observed treat if earn_observed > 0, robust ;
reg earn_observed_pos treat;

* Upper bound: trim the top X%;
reg earn_observed_pos treat if trim_top != 1;

* Lower bound: trim the bottom X%;
reg earn_observed_pos treat if trim_bottom != 1;

* Lee Bounds Stata routine;
leebounds earn_observed_pos treat ;
leebounds earn_observed_pos treat, tight(ability_group);
leebounds earn_observed_pos treat, tight(movecost_group);

* Regressions in Logs;

* Restrict to non-sero earnings (actual and observed), but use logs;
reg learn_actual treat, robust ;
reg learn_observed treat, robust ;

* Upper bound: trim the top X%;
reg learn_observed treat if trim_top != 1;

* Lower bound: trim the bottom X%;
reg learn_observed treat if trim_bottom != 1;

* Lee Bounds Stata routine;
leebounds learn_observed treat ;
leebounds learn_observed treat, tight(ability_group);
leebounds learn_observed treat, tight(movecost_group);

* Quantile regressions with log earnings;
qreg learn_actual treat, q(10)  ;
qreg learn_observed treat, q(10)  ;
qreg learn_observed treat if trim_top != 1, q(10)  ;
qreg learn_observed treat if trim_bottom != 1, q(10)  ;

qreg learn_actual treat, q(50)  ;
qreg learn_observed treat, q(50)  ;
qreg learn_observed treat if trim_top != 1, q(50)  ;
qreg learn_observed treat if trim_bottom != 1, q(50)  ;

qreg learn_actual treat, q(90) ;
qreg learn_observed treat, q(90)  ;
qreg learn_observed treat if trim_top != 1, q(90)  ;
qreg learn_observed treat if trim_bottom != 1, q(90)  ;

#delimit ;
* Bounding with monotonicity assumption failure;
* Upper bound: trim the top X% of control group and bottom X% of treatment group;
reg earn_observed_pos treat if trim_cont_top != 1 & trim_treat_bottom != 1;

* Lower bound: trim the bottom X% of control group and top X% of treatment group;
reg earn_observed_pos treat if trim_cont_bottom != 1 & trim_treat_top != 1;


* Upper bound: trim the top X% of control group and bottom X% of treatment group;
reg learn_observed treat if trim_cont_top != 1 & trim_treat_bottom != 1;

* Lower bound: trim the bottom X% of control group and top X% of treatment group;
reg learn_observed treat if trim_cont_bottom != 1 & trim_treat_top != 1;


}; 
* End of Lee Bounds commands;


**********************  End of Program *********************************************;

log close ;



